linux: User-space grant table device.
authorKeir Fraser <keir@xensource.com>
Sat, 31 Mar 2007 12:53:24 +0000 (13:53 +0100)
committerKeir Fraser <keir@xensource.com>
Sat, 31 Mar 2007 12:53:24 +0000 (13:53 +0100)
A character device for accessing (in user-space) pages that have been
granted by other domains.

Signed-off-by: Derek Murray <Derek.Murray@cl.cam.ac.uk>
Fix ioctl interface to be 32/64-bit invariant. Move xen_class
mechanism to common util.c.

Signed-off-by: Keir Fraser <keir@xensource.com>
linux-2.6-xen-sparse/drivers/xen/Makefile
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile [new file with mode: 0644]
linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c [new file with mode: 0644]
linux-2.6-xen-sparse/drivers/xen/util.c
linux-2.6-xen-sparse/include/xen/driver_util.h
linux-2.6-xen-sparse/include/xen/public/gntdev.h [new file with mode: 0644]

index cfae4625c4969eb2c9c760d89bbf6d887f0fb9c0..c7d66d139eac64153d87294b2390f2be3404b44c 100644 (file)
@@ -3,6 +3,7 @@ obj-y   += console/
 obj-y  += evtchn/
 obj-y  += privcmd/
 obj-y  += xenbus/
+obj-y  += gntdev/
 
 obj-$(CONFIG_XEN_UTIL)                 += util.o
 obj-$(CONFIG_XEN_BALLOON)              += balloon/
index 586de79f77c7f14c2af16260db000cf56ce41c6c..4fdb601ebec03eb4e3b823a6086b1a20e611a449 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 #include <xen/balloon.h>
+#include <xen/driver_util.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
 #define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
 
-
-struct class *xen_class;
-EXPORT_SYMBOL_GPL(xen_class);
-
-/*
- * Setup the xen class.  This should probably go in another file, but
- * since blktap is the only user of it so far, it gets to keep it.
- */
-int setup_xen_class(void)
-{
-       int ret;
-
-       if (xen_class)
-               return 0;
-
-       xen_class = class_create(THIS_MODULE, "xen");
-       if ((ret = IS_ERR(xen_class))) {
-               xen_class = NULL;
-               return ret;
-       }
-
-       return 0;
-}
-
 /*
  * The maximum number of requests that can be outstanding at any time
  * is determined by 
@@ -347,6 +324,7 @@ static const struct file_operations blktap_fops = {
 
 static tap_blkif_t *get_next_free_dev(void)
 {
+       struct class *class;
        tap_blkif_t *info;
        int minor;
 
@@ -409,9 +387,10 @@ found:
                wmb();
                tapfds[minor] = info;
 
-               class_device_create(xen_class, NULL,
-                                   MKDEV(blktap_major, minor), NULL,
-                                   "blktap%d", minor);
+               if ((class = get_xen_class()) != NULL)
+                       class_device_create(class, NULL,
+                                           MKDEV(blktap_major, minor), NULL,
+                                           "blktap%d", minor);
        }
 
 out:
@@ -1487,6 +1466,7 @@ static void make_response(blkif_t *blkif, unsigned long id,
 static int __init blkif_init(void)
 {
        int i, ret;
+       struct class *class;
 
        if (!is_running_on_xen())
                return -ENODEV;
@@ -1522,7 +1502,7 @@ static int __init blkif_init(void)
        DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
 
        /* Make sure the xen class exists */
-       if (!setup_xen_class()) {
+       if ((class = get_xen_class()) != NULL) {
                /*
                 * This will allow udev to create the blktap ctrl device.
                 * We only want to create blktap0 first.  We don't want
@@ -1530,7 +1510,7 @@ static int __init blkif_init(void)
                 * We only create the device when a request of a new device is
                 * made.
                 */
-               class_device_create(xen_class, NULL,
+               class_device_create(class, NULL,
                                    MKDEV(blktap_major, 0), NULL,
                                    "blktap0");
        } else {
diff --git a/linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile b/linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile
new file mode 100644 (file)
index 0000000..6fbcd64
--- /dev/null
@@ -0,0 +1 @@
+obj-y  := gntdev.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c b/linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c
new file mode 100644 (file)
index 0000000..fe1e98b
--- /dev/null
@@ -0,0 +1,971 @@
+/******************************************************************************
+ * gntdev.c
+ * 
+ * Device for accessing (in user-space) pages that have been granted by other
+ * domains.
+ *
+ * Copyright (c) 2006-2007, D G Murray.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <asm/atomic.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <xen/gnttab.h>
+#include <asm/hypervisor.h>
+#include <xen/balloon.h>
+#include <xen/evtchn.h>
+#include <xen/driver_util.h>
+
+#include <linux/types.h>
+#include <xen/public/gntdev.h>
+
+
+#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
+#define DRIVER_DESC   "User-space granted page access driver"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+
+#define MAX_GRANTS 128
+
+/* A slot can be in one of three states:
+ *
+ * 0. GNTDEV_SLOT_INVALID:
+ *    This slot is not associated with a grant reference, and is therefore free
+ *    to be overwritten by a new grant reference.
+ *
+ * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
+ *    This slot is associated with a grant reference (via the 
+ *    IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
+ *
+ * 2. GNTDEV_SLOT_MAPPED:
+ *    This slot is associated with a grant reference, and has been mmap()-ed.
+ */
+typedef enum gntdev_slot_state {
+       GNTDEV_SLOT_INVALID = 0,
+       GNTDEV_SLOT_NOT_YET_MAPPED,
+       GNTDEV_SLOT_MAPPED
+} gntdev_slot_state_t;
+
+#define GNTDEV_INVALID_HANDLE    -1
+#define GNTDEV_FREE_LIST_INVALID -1
+/* Each opened instance of gntdev is associated with a list of grants,
+ * represented by an array of elements of the following type,
+ * gntdev_grant_info_t.
+ */
+typedef struct gntdev_grant_info {
+       gntdev_slot_state_t state;
+       union {
+               uint32_t free_list_index;
+               struct {
+                       domid_t domid;
+                       grant_ref_t ref;
+                       grant_handle_t kernel_handle;
+                       grant_handle_t user_handle;
+                       uint64_t dev_bus_addr;
+               } valid;
+       } u;
+} gntdev_grant_info_t;
+
+/* Private data structure, which is stored in the file pointer for files
+ * associated with this device.
+ */
+typedef struct gntdev_file_private_data {
+  
+       /* Array of grant information. */
+       gntdev_grant_info_t grants[MAX_GRANTS];
+
+       /* Read/write semaphore used to protect the grants array. */
+       struct rw_semaphore grants_sem;
+
+       /* An array of indices of free slots in the grants array.
+        * N.B. An entry in this list may temporarily have the value
+        * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
+        * from the list by the contiguous allocator, but the list has not yet
+        * been compressed. However, this is not visible across invocations of
+        * the device.
+        */
+       int32_t free_list[MAX_GRANTS];
+       
+       /* The number of free slots in the grants array. */
+       uint32_t free_list_size;
+
+       /* Read/write semaphore used to protect the free list. */
+       struct rw_semaphore free_list_sem;
+       
+       /* Index of the next slot after the most recent contiguous allocation, 
+        * for use in a next-fit allocator.
+        */
+       uint32_t next_fit_index;
+
+       /* Used to map grants into the kernel, before mapping them into user
+        * space.
+        */
+       struct page **foreign_pages;
+
+} gntdev_file_private_data_t;
+
+/* Module lifecycle operations. */
+static int __init gntdev_init(void);
+static void __exit gntdev_exit(void);
+
+module_init(gntdev_init);
+module_exit(gntdev_exit);
+
+/* File operations. */
+static int gntdev_open(struct inode *inode, struct file *flip);
+static int gntdev_release(struct inode *inode, struct file *flip);
+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
+static int gntdev_ioctl (struct inode *inode, struct file *flip,
+                        unsigned int cmd, unsigned long arg);
+
+static struct file_operations gntdev_fops = {
+       .owner = THIS_MODULE,
+       .open = gntdev_open,
+       .release = gntdev_release,
+       .mmap = gntdev_mmap,
+       .ioctl = gntdev_ioctl
+};
+
+/* VM operations. */
+static void gntdev_vma_close(struct vm_area_struct *vma);
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+                             pte_t *ptep, int is_fullmm);
+
+static struct vm_operations_struct gntdev_vmops = {
+       .close = gntdev_vma_close,
+       .ptep_get_and_clear_full = gntdev_clear_pte
+};
+
+/* Global variables. */
+
+/* The driver major number, for use when unregistering the driver. */
+static int gntdev_major;
+
+#define GNTDEV_NAME "gntdev"
+
+/* Memory mapping functions
+ * ------------------------
+ *
+ * Every granted page is mapped into both kernel and user space, and the two
+ * following functions return the respective virtual addresses of these pages.
+ *
+ * When shadow paging is disabled, the granted page is mapped directly into
+ * user space; when it is enabled, it is mapped into the kernel and remapped
+ * into user space using vm_insert_page() (see gntdev_mmap(), below).
+ */
+
+/* Returns the virtual address (in user space) of the @page_index'th page
+ * in the given VM area.
+ */
+static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
+                                           int page_index)
+{
+       return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
+}
+
+/* Returns the virtual address (in kernel space) of the @slot_index'th page
+ * mapped by the gntdev instance that owns the given private data struct.
+ */
+static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
+                                             int slot_index)
+{
+       unsigned long pfn;
+       void *kaddr;
+       pfn = page_to_pfn(priv->foreign_pages[slot_index]);
+       kaddr = pfn_to_kaddr(pfn);
+       return (unsigned long) kaddr;
+}
+
+/* Helper functions. */
+
+/* Adds information about a grant reference to the list of grants in the file's
+ * private data structure. Returns non-zero on failure. On success, sets the
+ * value of *offset to the offset that should be mmap()-ed in order to map the
+ * grant reference.
+ */
+static int add_grant_reference(struct file *flip,
+                              struct ioctl_gntdev_grant_ref *op,
+                              uint64_t *offset)
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+
+       uint32_t slot_index;
+
+       if (unlikely(private_data->free_list_size == 0)) {
+               return -ENOMEM;
+       }
+
+       slot_index = private_data->free_list[--private_data->free_list_size];
+
+       /* Copy the grant information into file's private data. */
+       private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
+       private_data->grants[slot_index].u.valid.domid = op->domid;
+       private_data->grants[slot_index].u.valid.ref = op->ref;
+
+       /* The offset is calculated as the index of the chosen entry in the
+        * file's private data's array of grant information. This is then
+        * shifted to give an offset into the virtual "file address space".
+        */
+       *offset = slot_index << PAGE_SHIFT;
+
+       return 0;
+}
+
+/* Adds the @count grant references to the contiguous range in the slot array
+ * beginning at @first_slot. It is assumed that @first_slot was returned by a
+ * previous invocation of find_contiguous_free_range(), during the same
+ * invocation of the driver.
+ */
+static int add_grant_references(struct file *flip,
+                               int count,
+                               struct ioctl_gntdev_grant_ref *ops,
+                               uint32_t first_slot)
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       int i;
+       
+       for (i = 0; i < count; ++i) {
+
+               /* First, mark the slot's entry in the free list as invalid. */
+               int free_list_index = 
+                       private_data->grants[first_slot+i].u.free_list_index;
+               private_data->free_list[free_list_index] = 
+                       GNTDEV_FREE_LIST_INVALID;
+
+               /* Now, update the slot. */
+               private_data->grants[first_slot+i].state = 
+                       GNTDEV_SLOT_NOT_YET_MAPPED;
+               private_data->grants[first_slot+i].u.valid.domid =
+                       ops[i].domid;
+               private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
+       }
+
+       return 0;       
+}
+
+/* Scans through the free list for @flip, removing entries that are marked as
+ * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
+ * the number of valid entries.
+ */
+static void compress_free_list(struct file *flip) 
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       int i, j = 0, old_size;
+       
+       old_size = private_data->free_list_size;
+       for (i = 0; i < old_size; ++i) {
+               if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
+                       private_data->free_list[j] = 
+                               private_data->free_list[i];
+                       ++j;
+               } else {
+                       --private_data->free_list_size;
+               }
+       }
+}
+
+/* Searches the grant array in the private data of @flip for a range of
+ * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
+ *
+ * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
+ */
+static int find_contiguous_free_range(struct file *flip,
+                                     uint32_t num_slots) 
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       
+       int i;
+       int start_index = private_data->next_fit_index;
+       int range_start = 0, range_length;
+
+       if (private_data->free_list_size < num_slots) {
+               return -ENOMEM;
+       }
+
+       /* First search from the start_index to the end of the array. */
+       range_length = 0;
+       for (i = start_index; i < MAX_GRANTS; ++i) {
+               if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+                       if (range_length == 0) {
+                               range_start = i;
+                       }
+                       ++range_length;
+                       if (range_length == num_slots) {
+                               return range_start;
+                       }
+               }
+       }
+       
+       /* Now search from the start of the array to the start_index. */
+       range_length = 0;
+       for (i = 0; i < start_index; ++i) {
+               if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+                       if (range_length == 0) {
+                               range_start = i;
+                       }
+                       ++range_length;
+                       if (range_length == num_slots) {
+                               return range_start;
+                       }
+               }
+       }
+       
+       return -ENOMEM;
+}
+
+/* Interface functions. */
+
+/* Initialises the driver. Called when the module is loaded. */
+static int __init gntdev_init(void)
+{
+       struct class *class;
+       struct class_device *device;
+
+       if (!is_running_on_xen()) {
+               printk(KERN_ERR "You must be running Xen to use gntdev\n");
+               return -ENODEV;
+       }
+
+       gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
+       if (gntdev_major < 0)
+       {
+               printk(KERN_ERR "Could not register gntdev device\n");
+               return -ENOMEM;
+       }
+
+       /* Note that if the sysfs code fails, we will still initialise the
+        * device, and output the major number so that the device can be
+        * created manually using mknod.
+        */
+       if ((class = get_xen_class()) == NULL) {
+               printk(KERN_ERR "Error setting up xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n", 
+                      gntdev_major);
+               return 0;
+       }
+
+       device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
+                                    NULL, GNTDEV_NAME);
+       if (IS_ERR(device)) {
+               printk(KERN_ERR "Error creating gntdev device in xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n",
+                      gntdev_major);
+               return 0;
+       }
+
+       return 0;
+}
+
+/* Cleans up and unregisters the driver. Called when the driver is unloaded.
+ */
+static void __exit gntdev_exit(void)
+{
+       struct class *class;
+       if ((class = get_xen_class()) != NULL)
+               class_device_destroy(class, MKDEV(gntdev_major, 0));
+       unregister_chrdev(gntdev_major, GNTDEV_NAME);
+}
+
+/* Called when the device is opened. */
+static int gntdev_open(struct inode *inode, struct file *flip)
+{
+       gntdev_file_private_data_t *private_data;
+       int i;
+
+       try_module_get(THIS_MODULE);
+
+       /* Allocate space for the per-instance private data. */
+       private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
+       if (!private_data)
+               goto nomem_out;
+
+       /* Allocate space for the kernel-mapping of granted pages. */
+       private_data->foreign_pages = 
+               alloc_empty_pages_and_pagevec(MAX_GRANTS);
+       if (!private_data->foreign_pages)
+               goto nomem_out2;
+
+       /* Initialise the free-list, which contains all slots at first.
+        */
+       for (i = 0; i < MAX_GRANTS; ++i) {
+               private_data->free_list[MAX_GRANTS - i - 1] = i;
+               private_data->grants[i].state = GNTDEV_SLOT_INVALID;
+               private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1;
+       }
+       private_data->free_list_size = MAX_GRANTS;
+       private_data->next_fit_index = 0;
+
+       init_rwsem(&private_data->grants_sem);
+       init_rwsem(&private_data->free_list_sem);
+
+       flip->private_data = private_data;
+
+       return 0;
+
+nomem_out2:
+       kfree(private_data);
+nomem_out:
+       return -ENOMEM;
+}
+
+/* Called when the device is closed.
+ */
+static int gntdev_release(struct inode *inode, struct file *flip)
+{
+       if (flip->private_data) {
+               gntdev_file_private_data_t *private_data = 
+                       (gntdev_file_private_data_t *) flip->private_data;
+               if (private_data->foreign_pages) {
+                       free_empty_pages_and_pagevec
+                               (private_data->foreign_pages, MAX_GRANTS);
+               }
+               kfree(private_data);
+       }
+       module_put(THIS_MODULE);
+       return 0;
+}
+
+/* Called when an attempt is made to mmap() the device. The private data from
+ * @flip contains the list of grant references that can be mapped. The vm_pgoff
+ * field of @vma contains the index into that list that refers to the grant
+ * reference that will be mapped. Only mappings that are a multiple of
+ * PAGE_SIZE are handled.
+ */
+static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) 
+{
+       struct gnttab_map_grant_ref op;
+       unsigned long slot_index = vma->vm_pgoff;
+       unsigned long kernel_vaddr, user_vaddr;
+       uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       uint64_t ptep;
+       int ret;
+       int flags;
+       int i;
+       struct page *page;
+       gntdev_file_private_data_t *private_data = flip->private_data;
+
+       if (unlikely(!private_data)) {
+               printk(KERN_ERR "File's private data is NULL.\n");
+               return -EINVAL;
+       }
+
+       if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) {
+               printk(KERN_ERR "Invalid number of pages or offset"
+                      "(num_pages = %d, first_slot = %ld).\n",
+                      size, slot_index);
+               return -ENXIO;
+       }
+
+       if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
+               printk(KERN_ERR "Writable mappings must be shared.\n");
+               return -EINVAL;
+       }
+
+       /* Slots must be in the NOT_YET_MAPPED state. */
+       down_write(&private_data->grants_sem);
+       for (i = 0; i < size; ++i) {
+               if (private_data->grants[slot_index + i].state != 
+                   GNTDEV_SLOT_NOT_YET_MAPPED) {
+                       printk(KERN_ERR "Slot (index = %ld) is in the wrong "
+                              "state (%d).\n", slot_index + i, 
+                              private_data->grants[slot_index + i].state);
+                       up_write(&private_data->grants_sem);
+                       return -EINVAL;
+               }
+       }
+
+       /* Install the hook for unmapping. */
+       vma->vm_ops = &gntdev_vmops;
+    
+       /* The VM area contains pages from another VM. */
+       vma->vm_flags |= VM_FOREIGN;
+       vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *), 
+                                      GFP_KERNEL);
+       if (vma->vm_private_data == NULL) {
+               printk(KERN_ERR "Couldn't allocate mapping structure for VM "
+                      "area.\n");
+               return -ENOMEM;
+       }
+
+       /* This flag prevents Bad PTE errors when the memory is unmapped. */
+       vma->vm_flags |= VM_RESERVED;
+
+       /* This flag prevents this VM area being copied on a fork(). A better
+        * behaviour might be to explicitly carry out the appropriate mappings
+        * on fork(), but I don't know if there's a hook for this.
+        */
+       vma->vm_flags |= VM_DONTCOPY;
+
+       /* This flag ensures that the page tables are not unpinned before the
+        * VM area is unmapped. Therefore Xen still recognises the PTE as
+        * belonging to an L1 pagetable, and the grant unmap operation will
+        * succeed, even if the process does not exit cleanly.
+        */
+       vma->vm_mm->context.has_foreign_mappings = 1;
+
+       for (i = 0; i < size; ++i) {
+
+               flags = GNTMAP_host_map;
+               if (!(vma->vm_flags & VM_WRITE))
+                       flags |= GNTMAP_readonly;
+
+               kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
+               user_vaddr = get_user_vaddr(vma, i);
+               page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
+
+               gnttab_set_map_op(&op, kernel_vaddr, flags,   
+                                 private_data->grants[slot_index+i]
+                                 .u.valid.ref, 
+                                 private_data->grants[slot_index+i]
+                                 .u.valid.domid);
+
+               /* Carry out the mapping of the grant reference. */
+               ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 
+                                               &op, 1);
+               BUG_ON(ret);
+               if (op.status) {
+                       printk(KERN_ERR "Error mapping the grant reference "
+                              "into the kernel (%d). domid = %d; ref = %d\n",
+                              op.status,
+                              private_data->grants[slot_index+i]
+                              .u.valid.domid,
+                              private_data->grants[slot_index+i]
+                              .u.valid.ref);
+                       goto undo_map_out;
+               }
+
+               /* Store a reference to the page that will be mapped into user
+                * space.
+                */
+               ((struct page **) vma->vm_private_data)[i] = page;
+
+               /* Mark mapped page as reserved. */
+               SetPageReserved(page);
+
+               /* Record the grant handle, for use in the unmap operation. */
+               private_data->grants[slot_index+i].u.valid.kernel_handle = 
+                       op.handle;
+               private_data->grants[slot_index+i].u.valid.dev_bus_addr = 
+                       op.dev_bus_addr;
+               
+               private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
+               private_data->grants[slot_index+i].u.valid.user_handle =
+                       GNTDEV_INVALID_HANDLE;
+
+               /* Now perform the mapping to user space. */
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+
+                       /* NOT USING SHADOW PAGE TABLES. */
+                       /* In this case, we map the grant(s) straight into user
+                        * space.
+                        */
+
+                       /* Get the machine address of the PTE for the user 
+                        *  page.
+                        */
+                       if ((ret = create_lookup_pte_addr(vma->vm_mm, 
+                                                         vma->vm_start 
+                                                         + (i << PAGE_SHIFT), 
+                                                         &ptep)))
+                       {
+                               printk(KERN_ERR "Error obtaining PTE pointer "
+                                      "(%d).\n", ret);
+                               goto undo_map_out;
+                       }
+                       
+                       /* Configure the map operation. */
+               
+                       /* The reference is to be used by host CPUs. */
+                       flags = GNTMAP_host_map;
+                       
+                       /* Specifies a user space mapping. */
+                       flags |= GNTMAP_application_map;
+                       
+                       /* The map request contains the machine address of the
+                        * PTE to update.
+                        */
+                       flags |= GNTMAP_contains_pte;
+                       
+                       if (!(vma->vm_flags & VM_WRITE))
+                               flags |= GNTMAP_readonly;
+
+                       gnttab_set_map_op(&op, ptep, flags, 
+                                         private_data->grants[slot_index+i]
+                                         .u.valid.ref, 
+                                         private_data->grants[slot_index+i]
+                                         .u.valid.domid);
+
+                       /* Carry out the mapping of the grant reference. */
+                       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+                                                       &op, 1);
+                       BUG_ON(ret);
+                       if (op.status) {
+                               printk(KERN_ERR "Error mapping the grant "
+                                      "reference into user space (%d). domid "
+                                      "= %d; ref = %d\n", op.status,
+                                      private_data->grants[slot_index+i].u
+                                      .valid.domid,
+                                      private_data->grants[slot_index+i].u
+                                      .valid.ref);
+                               goto undo_map_out;
+                       }
+                       
+                       /* Record the grant handle, for use in the unmap 
+                        * operation. 
+                        */
+                       private_data->grants[slot_index+i].u.
+                               valid.user_handle = op.handle;
+
+                       /* Update p2m structure with the new mapping. */
+                       set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
+                                           FOREIGN_FRAME(private_data->
+                                                         grants[slot_index+i]
+                                                         .u.valid.dev_bus_addr
+                                                         >> PAGE_SHIFT));
+               } else {
+                       /* USING SHADOW PAGE TABLES. */
+                       /* In this case, we simply insert the page into the VM
+                        * area. */
+                       ret = vm_insert_page(vma, user_vaddr, page);
+               }
+
+       }
+
+       up_write(&private_data->grants_sem);
+       return 0;
+
+undo_map_out:
+       /* If we have a mapping failure, the unmapping will be taken care of
+        * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
+        * All we need to do here is free the vma_private_data.
+        */
+       kfree(vma->vm_private_data);
+
+       /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+        * to NULL on failure. However, we need this in gntdev_clear_pte() to
+        * unmap the grants. Therefore, we smuggle a reference to the file's
+        * private data in the VM area's private data pointer.
+        */
+       vma->vm_private_data = private_data;
+       
+       up_write(&private_data->grants_sem);
+
+       return -ENOMEM;
+}
+
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+                             pte_t *ptep, int is_fullmm)
+{
+       int slot_index, ret;
+       pte_t copy;
+       struct gnttab_unmap_grant_ref op;
+       gntdev_file_private_data_t *private_data;
+
+       /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+        * to NULL on failure. However, we need this in gntdev_clear_pte() to
+        * unmap the grants. Therefore, we smuggle a reference to the file's
+        * private data in the VM area's private data pointer.
+        */
+       if (vma->vm_file) {
+               private_data = (gntdev_file_private_data_t *)
+                       vma->vm_file->private_data;
+       } else if (vma->vm_private_data) {
+               private_data = (gntdev_file_private_data_t *)
+                       vma->vm_private_data;
+       } else {
+               private_data = NULL; /* gcc warning */
+               BUG();
+       }
+
+       /* Copy the existing value of the PTE for returning. */
+       copy = *ptep;
+
+       /* Calculate the grant relating to this PTE. */
+       slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+
+       /* Only unmap grants if the slot has been mapped. This could be being
+        * called from a failing mmap().
+        */
+       if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
+
+               /* First, we clear the user space mapping, if it has been made.
+                */
+               if (private_data->grants[slot_index].u.valid.user_handle !=
+                   GNTDEV_INVALID_HANDLE && 
+                   !xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* NOT USING SHADOW PAGE TABLES. */
+                       gnttab_set_unmap_op(&op, virt_to_machine(ptep), 
+                                           GNTMAP_contains_pte,
+                                           private_data->grants[slot_index]
+                                           .u.valid.user_handle);
+                       ret = HYPERVISOR_grant_table_op(
+                               GNTTABOP_unmap_grant_ref, &op, 1);
+                       BUG_ON(ret);
+                       if (op.status)
+                               printk("User unmap grant status = %d\n", 
+                                      op.status);
+               } else {
+                       /* USING SHADOW PAGE TABLES. */
+                       pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+               }
+
+               /* Finally, we unmap the grant from kernel space. */
+               gnttab_set_unmap_op(&op, 
+                                   get_kernel_vaddr(private_data, slot_index),
+                                   GNTMAP_host_map, 
+                                   private_data->grants[slot_index].u.valid
+                                   .kernel_handle);
+               ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 
+                                               &op, 1);
+               BUG_ON(ret);
+               if (op.status)
+                       printk("Kernel unmap grant status = %d\n", op.status);
+
+
+               /* Return slot to the not-yet-mapped state, so that it may be
+                * mapped again, or removed by a subsequent ioctl.
+                */
+               private_data->grants[slot_index].state = 
+                       GNTDEV_SLOT_NOT_YET_MAPPED;
+
+               /* Invalidate the physical to machine mapping for this page. */
+               set_phys_to_machine(__pa(get_kernel_vaddr(private_data, 
+                                                         slot_index)) 
+                                   >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+
+       } else {
+               pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+       }
+
+       return copy;
+}
+
+/* "Destructor" for a VM area.
+ */
+static void gntdev_vma_close(struct vm_area_struct *vma) {
+       if (vma->vm_private_data) {
+               kfree(vma->vm_private_data);
+       }
+}
+
+/* Called when an ioctl is made on the device.
+ */
+static int gntdev_ioctl(struct inode *inode, struct file *flip,
+                       unsigned int cmd, unsigned long arg)
+{
+       int rc = 0;
+       gntdev_file_private_data_t *private_data = 
+               (gntdev_file_private_data_t *) flip->private_data;
+
+       switch (cmd) {
+       case IOCTL_GNTDEV_MAP_GRANT_REF:
+       {
+               struct ioctl_gntdev_map_grant_ref op;
+               down_write(&private_data->grants_sem);
+               down_write(&private_data->free_list_sem);
+
+               if ((rc = copy_from_user(&op, (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto map_out;
+               }
+               if (unlikely(op.count <= 0)) {
+                       rc = -EINVAL;
+                       goto map_out;
+               }
+
+               if (op.count == 1) {
+                       if ((rc = add_grant_reference(flip, &op.refs[0],
+                                                     &op.index)) < 0) {
+                               printk(KERN_ERR "Adding grant reference "
+                                      "failed (%d).\n", rc);
+                               goto map_out;
+                       }
+               } else {
+                       struct ioctl_gntdev_grant_ref *refs, *u;
+                       refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
+                       if (!refs) {
+                               rc = -ENOMEM;
+                               goto map_out;
+                       }
+                       u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
+                       if ((rc = copy_from_user(refs,
+                                                (void __user *)u,
+                                                sizeof(*refs) * op.count))) {
+                               printk(KERN_ERR "Copying refs from user failed"
+                                      " (%d).\n", rc);
+                               rc = -EINVAL;
+                               goto map_out;
+                       }
+                       if ((rc = find_contiguous_free_range(flip, op.count))
+                           < 0) {
+                               printk(KERN_ERR "Finding contiguous range "
+                                      "failed (%d).\n", rc);
+                               kfree(refs);
+                               goto map_out;
+                       }
+                       op.index = rc << PAGE_SHIFT;
+                       if ((rc = add_grant_references(flip, op.count,
+                                                      refs, rc))) {
+                               printk(KERN_ERR "Adding grant references "
+                                      "failed (%d).\n", rc);
+                               kfree(refs);
+                               goto map_out;
+                       }
+                       compress_free_list(flip);
+                       kfree(refs);
+               }
+               if ((rc = copy_to_user((void __user *) arg, 
+                                      &op, 
+                                      sizeof(op)))) {
+                       printk(KERN_ERR "Copying result back to user failed "
+                              "(%d)\n", rc);
+                       rc = -EFAULT;
+                       goto map_out;
+               }
+       map_out:
+               up_write(&private_data->grants_sem);
+               up_write(&private_data->free_list_sem);
+               return rc;
+       }
+       case IOCTL_GNTDEV_UNMAP_GRANT_REF:
+       {
+               struct ioctl_gntdev_unmap_grant_ref op;
+               int i, start_index;
+
+               down_write(&private_data->grants_sem);
+               down_write(&private_data->free_list_sem);
+
+               if ((rc = copy_from_user(&op, 
+                                        (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto unmap_out;
+               }
+
+               start_index = op.index >> PAGE_SHIFT;
+
+               /* First, check that all pages are in the NOT_YET_MAPPED
+                * state.
+                */
+               for (i = 0; i < op.count; ++i) {
+                       if (unlikely
+                           (private_data->grants[start_index + i].state
+                            != GNTDEV_SLOT_NOT_YET_MAPPED)) {
+                               if (private_data->grants[start_index + i].state
+                                   == GNTDEV_SLOT_INVALID) {
+                                       printk(KERN_ERR
+                                              "Tried to remove an invalid "
+                                              "grant at offset 0x%x.",
+                                              (start_index + i) 
+                                              << PAGE_SHIFT);
+                                       rc = -EINVAL;
+                               } else {
+                                       printk(KERN_ERR
+                                              "Tried to remove a grant which "
+                                              "is currently mmap()-ed at "
+                                              "offset 0x%x.",
+                                              (start_index + i) 
+                                              << PAGE_SHIFT);
+                                       rc = -EBUSY;
+                               }
+                               goto unmap_out;
+                       }
+               }
+
+               /* Unmap pages and add them to the free list.
+                */
+               for (i = 0; i < op.count; ++i) {
+                       private_data->grants[start_index+i].state = 
+                               GNTDEV_SLOT_INVALID;
+                       private_data->grants[start_index+i].u.free_list_index =
+                               private_data->free_list_size;
+                       private_data->free_list[private_data->free_list_size] =
+                               start_index + i;
+                       ++private_data->free_list_size;
+               }
+               compress_free_list(flip);
+
+       unmap_out:
+               up_write(&private_data->grants_sem);
+               up_write(&private_data->free_list_sem);
+               return rc;
+       }
+       case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
+       {
+               struct ioctl_gntdev_get_offset_for_vaddr op;
+               struct vm_area_struct *vma;
+               unsigned long vaddr;
+
+               if ((rc = copy_from_user(&op, 
+                                        (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto get_offset_out;
+               }
+               vaddr = (unsigned long)op.vaddr;
+
+               down_read(&current->mm->mmap_sem);              
+               vma = find_vma(current->mm, vaddr);
+               if (vma == NULL) {
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
+                       printk(KERN_ERR "The vaddr specified does not belong "
+                              "to a gntdev instance: %#lx\n", vaddr);
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               if (vma->vm_start != vaddr) {
+                       printk(KERN_ERR "The vaddr specified in an "
+                              "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
+                              "the start of the VM area. vma->vm_start = "
+                              "%#lx; vaddr = %#lx\n",
+                              vma->vm_start, vaddr);
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               op.offset = vma->vm_pgoff << PAGE_SHIFT;
+               op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+               up_read(&current->mm->mmap_sem);
+               if ((rc = copy_to_user((void __user *) arg, 
+                                      &op, 
+                                      sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto get_offset_out;
+               }
+               goto get_offset_out;
+       get_offset_unlock_out:
+               up_read(&current->mm->mmap_sem);
+       get_offset_out:
+               return rc;
+       }
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       return 0;
+}
index 9d32712ff710fc1a7d8de01d807c5c5ee3529cf4..922c201abd1ccd9d52e03b3cc325b253a687ae17 100644 (file)
@@ -5,6 +5,23 @@
 #include <asm/uaccess.h>
 #include <xen/driver_util.h>
 
+struct class *get_xen_class(void)
+{
+       static struct class *xen_class;
+
+       if (xen_class)
+               return xen_class;
+
+       xen_class = class_create(THIS_MODULE, "xen");
+       if (IS_ERR(xen_class)) {
+               printk("Failed to create xen sysfs class.\n");
+               xen_class = NULL;
+       }
+
+       return xen_class;
+}
+EXPORT_SYMBOL_GPL(get_xen_class);
+
 static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
 {
        /* apply_to_page_range() does all the hard work. */
index 766f651d543b07ff03c267594c5de49ecc356c99..1c3bdd235795cc141601c14b6e4143a521785174 100644 (file)
@@ -3,9 +3,12 @@
 #define __ASM_XEN_DRIVER_UTIL_H__
 
 #include <linux/vmalloc.h>
+#include <linux/device.h>
 
 /* Allocate/destroy a 'vmalloc' VM area. */
 extern struct vm_struct *alloc_vm_area(unsigned long size);
 extern void free_vm_area(struct vm_struct *area);
 
+extern struct class *get_xen_class(void);
+
 #endif /* __ASM_XEN_DRIVER_UTIL_H__ */
diff --git a/linux-2.6-xen-sparse/include/xen/public/gntdev.h b/linux-2.6-xen-sparse/include/xen/public/gntdev.h
new file mode 100644 (file)
index 0000000..c95e46b
--- /dev/null
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * gntdev.h
+ * 
+ * Interface to /dev/xen/gntdev.
+ * 
+ * Copyright (c) 2007, D G Murray
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTDEV_H__
+#define __LINUX_PUBLIC_GNTDEV_H__
+
+struct ioctl_gntdev_grant_ref {
+       /* The domain ID of the grant to be mapped. */
+       uint32_t domid;
+       /* The grant reference of the grant to be mapped. */
+       uint32_t ref;
+};
+
+/*
+ * Inserts the grant references into the mapping table of an instance
+ * of gntdev. N.B. This does not perform the mapping, which is deferred
+ * until mmap() is called with @index as the offset.
+ */
+#define IOCTL_GNTDEV_MAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
+struct ioctl_gntdev_map_grant_ref {
+       /* IN parameters */
+       /* The number of grants to be mapped. */
+       uint32_t count;
+       uint32_t pad;
+       /* OUT parameters */
+       /* The offset to be used on a subsequent call to mmap(). */
+       uint64_t index;
+       /* Variable IN parameter. */
+       /* Array of grant references, of size @count. */
+       struct ioctl_gntdev_grant_ref refs[1];
+};
+
+/*
+ * Removes the grant references from the mapping table of an instance of
+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
+ * before this ioctl is called, or an error will result.
+ */
+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
+struct ioctl_gntdev_unmap_grant_ref {
+       /* IN parameters */
+       /* The offset was returned by the corresponding map operation. */
+       uint64_t index;
+       /* The number of pages to be unmapped. */
+       uint32_t count;
+       uint32_t pad;
+};
+
+/*
+ * Returns the offset in the driver's address space that corresponds
+ * to @vaddr. This can be used to perform a munmap(), followed by an
+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
+ * the caller. The number of pages that were allocated at the same time as
+ * @vaddr is returned in @count.
+ *
+ * N.B. Where more than one page has been mapped into a contiguous range, the
+ *      supplied @vaddr must correspond to the start of the range; otherwise
+ *      an error will result. It is only possible to munmap() the entire
+ *      contiguously-allocated range at once, and not any subrange thereof.
+ */
+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
+struct ioctl_gntdev_get_offset_for_vaddr {
+       /* IN parameters */
+       /* The virtual address of the first mapped page in a range. */
+       uint64_t vaddr;
+       /* OUT parameters */
+       /* The offset that was used in the initial mmap() operation. */
+       uint64_t offset;
+       /* The number of pages mapped in the VM area that begins at @vaddr. */
+       uint32_t count;
+       uint32_t pad;
+};
+
+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */